'''
练习：百度热搜页榜单数据爬虫
'''
from urllib.request import urlopen
from bs4 import BeautifulSoup as bf

html = urlopen("http://top.baidu.com/board?platform=pc&sa=pcindex_a_right")
obj = bf(html.read(),'html.parser')
# 爬取热搜榜列表
def spider_hotsearch_list():
    hotsearch_info = obj.find_all('div', class_='content-pos_1fT0H')
    print('1、热搜榜：')
    for title in hotsearch_info:
        print('-', title.contents[3].contents[1].text)
# 爬取小说榜列表
def spider_novel_list():
    novel_info = obj.find('div', theme='novel', class_='category-item_29ePD')
    novel_title_info = novel_info.find_all('div', class_='right_1PE2e')
    print('2、小说榜：')
    for title in novel_title_info:
        name = title.contents[1].text
        hot_search_no = title.contents[3].contents[1].text
        desc = title.contents[5].text
        print(name, '|', desc, '|', hot_search_no)
# 爬取电影榜列表
def spider_movie_list():
    movie_info = obj.find('div', theme='movie', class_='category-item_29ePD')
    movie_title_info = movie_info.find_all('div', class_='right_1PE2e')
    print('3、电影榜：')
    for title in movie_title_info:
        name = title.contents[1].text
        hot_search_no = title.contents[3].contents[1].text
        desc = title.contents[5].text
        print(name, '|', desc, '|', hot_search_no)
# 爬取电视剧榜列表
def spider_teleplay_list():
    teleplay_info = obj.find('div', theme='teleplay', class_='category-item_29ePD')
    teleplay_title_info = teleplay_info.find_all('div', class_='right_1PE2e')
    print('4、电视剧榜：')
    for title in teleplay_title_info:
        name = title.contents[1].text
        hot_search_no = title.contents[3].contents[1].text
        desc = title.contents[5].text
        print(name, '|', desc, '|', hot_search_no)
# 爬取汽车榜列表
def spider_car_list():
    car_info = obj.find('div', theme='car', class_='category-item_29ePD')
    car_title_info = car_info.find_all('div', class_='right_1PE2e')
    print('5、汽车榜：')
    for title in car_title_info:
        name = title.contents[1].text
        hot_search_no = title.contents[3].contents[1].text
        desc = title.contents[5].text
        print(name, '|', desc, '|', hot_search_no)
# 爬取游戏榜列表
def spider_game_list():
    game_info = obj.find('div', theme='game', class_='category-item_29ePD')
    game_title_info = game_info.find_all('div', class_='right_1PE2e')
    print('6、游戏榜：')
    for title in game_title_info:
        name = title.contents[1].text
        hot_search_no = title.contents[3].contents[1].text
        desc = title.contents[5].text
        print(name, '|', desc, '|', hot_search_no)

if __name__ == "__main__":
    print('爬虫程序')
    spider_hotsearch_list()
    spider_novel_list()
    spider_movie_list()
    spider_teleplay_list()
    spider_car_list()
    spider_game_list()
